{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-28T08:20:58.186940Z",
     "iopub.status.busy": "2025-09-28T08:20:58.186626Z",
     "iopub.status.idle": "2025-09-28T08:21:02.158617Z",
     "shell.execute_reply": "2025-09-28T08:21:02.157802Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from river import datasets\n",
    "from deep_river import regression\n",
    "from torch import nn\n",
    "from river import compose\n",
    "from river import preprocessing\n",
    "from itertools import islice\n",
    "from pprint import pprint\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-28T08:21:02.162015Z",
     "iopub.status.busy": "2025-09-28T08:21:02.161703Z",
     "iopub.status.idle": "2025-09-28T08:21:02.167752Z",
     "shell.execute_reply": "2025-09-28T08:21:02.167025Z"
    }
   },
   "outputs": [],
   "source": [
    "class MyModule(nn.Module):\n",
    "    def __init__(self, n_features):\n",
    "        super(MyModule, self).__init__()\n",
    "        self.dense0 = nn.Linear(n_features, 5)\n",
    "        self.nonlin = nn.ReLU()\n",
    "        self.dense1 = nn.Linear(5, 1)\n",
    "        self.softmax = nn.Softmax(dim=-1)\n",
    "\n",
    "    def forward(self, X, **kwargs):\n",
    "        X = self.nonlin(self.dense0(X))\n",
    "        X = self.nonlin(self.dense1(X))\n",
    "        X = self.softmax(X)\n",
    "        return X\n",
    "\n",
    "\n",
    "def batcher(iterable, batch_size):\n",
    "    iterator = iter(iterable)\n",
    "    while batch := list(islice(iterator, batch_size)):\n",
    "        yield batch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-28T08:21:02.170890Z",
     "iopub.status.busy": "2025-09-28T08:21:02.170626Z",
     "iopub.status.idle": "2025-09-28T08:21:02.177669Z",
     "shell.execute_reply": "2025-09-28T08:21:02.176767Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'clouds': 75,\n",
      " 'description': 'light rain',\n",
      " 'humidity': 81,\n",
      " 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),\n",
      " 'pressure': 1017.0,\n",
      " 'station': 'metro-canal-du-midi',\n",
      " 'temperature': 6.54,\n",
      " 'wind': 9.3}\n",
      "Number of available bikes: 1\n"
     ]
    }
   ],
   "source": [
    "dataset = datasets.Bikes()\n",
    "\n",
    "for x, y in dataset:\n",
    "    pprint(x)\n",
    "    print(f\"Number of available bikes: {y}\")\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-28T08:21:02.180926Z",
     "iopub.status.busy": "2025-09-28T08:21:02.180640Z",
     "iopub.status.idle": "2025-09-28T08:21:03.085669Z",
     "shell.execute_reply": "2025-09-28T08:21:03.085019Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><div class=\"river-component river-pipeline\"><details class=\"river-component river-estimator\"><summary class=\"river-summary\"><pre class=\"river-estimator-name\">['clouds', [...]</pre></summary><code class=\"river-estimator-params\">Select (\n",
       "  clouds\n",
       "  humidity\n",
       "  pressure\n",
       "  temperature\n",
       "  wind\n",
       ")\n",
       "</code></details><details class=\"river-component river-estimator\"><summary class=\"river-summary\"><pre class=\"river-estimator-name\">Regressor</pre></summary><code class=\"river-estimator-params\">Regressor (\n",
       "  module=MyModule(\n",
       "  (dense0): Linear(in_features=5, out_features=5, bias=True)\n",
       "  (nonlin): ReLU()\n",
       "  (dense1): Linear(in_features=5, out_features=1, bias=True)\n",
       "  (softmax): Softmax(dim=-1)\n",
       ")\n",
       "  loss_fn=\"mse\"\n",
       "  optimizer_fn=\"sgd\"\n",
       "  lr=0.001\n",
       "  is_feature_incremental=False\n",
       "  device=\"cpu\"\n",
       "  seed=42\n",
       ")\n",
       "</code></details></div><style scoped>\n",
       ".river-estimator {\n",
       "    padding: 1em;\n",
       "    border-style: solid;\n",
       "    background: white;\n",
       "    max-width: max-content;\n",
       "}\n",
       "\n",
       ".river-pipeline {\n",
       "    display: flex;\n",
       "    flex-direction: column;\n",
       "    align-items: center;\n",
       "    background: linear-gradient(#000, #000) no-repeat center / 1.5px 100%;\n",
       "}\n",
       "\n",
       ".river-union {\n",
       "    display: flex;\n",
       "    flex-direction: row;\n",
       "    align-items: center;\n",
       "    justify-content: center;\n",
       "    padding: 1em;\n",
       "    border-style: solid;\n",
       "    background: white;\n",
       "}\n",
       "\n",
       ".river-wrapper {\n",
       "    display: flex;\n",
       "    flex-direction: column;\n",
       "    align-items: center;\n",
       "    justify-content: center;\n",
       "    padding: 1em;\n",
       "    border-style: solid;\n",
       "    background: white;\n",
       "}\n",
       "\n",
       ".river-wrapper > .river-estimator {\n",
       "    margin-top: 1em;\n",
       "}\n",
       "\n",
       "/* Vertical spacing between steps */\n",
       "\n",
       ".river-component + .river-component {\n",
       "    margin-top: 2em;\n",
       "}\n",
       "\n",
       ".river-union > .river-estimator {\n",
       "    margin-top: 0;\n",
       "}\n",
       "\n",
       ".river-union > .river-component {\n",
       "    margin-top: 0;\n",
       "}\n",
       "\n",
       ".river-union > .pipeline {\n",
       "    margin-top: 0;\n",
       "}\n",
       "\n",
       "/* Spacing within a union of estimators */\n",
       "\n",
       ".river-union > .river-component + .river-component {\n",
       "    margin-left: 1em;\n",
       "}\n",
       "\n",
       "/* Typography */\n",
       "\n",
       ".river-estimator-params {\n",
       "    display: block;\n",
       "    white-space: pre-wrap;\n",
       "    font-size: 110%;\n",
       "    margin-top: 1em;\n",
       "}\n",
       "\n",
       ".river-estimator > .river-estimator-params,\n",
       ".river-wrapper > .river-details > river-estimator-params {\n",
       "    background-color: white !important;\n",
       "}\n",
       "\n",
       ".river-wrapper > .river-details {\n",
       "    margin-bottom: 1em;\n",
       "}\n",
       "\n",
       ".river-estimator-name {\n",
       "    display: inline;\n",
       "    margin: 0;\n",
       "    font-size: 110%;\n",
       "}\n",
       "\n",
       "/* Toggle */\n",
       "\n",
       ".river-summary {\n",
       "    display: flex;\n",
       "    align-items:center;\n",
       "    cursor: pointer;\n",
       "}\n",
       "\n",
       ".river-summary > div {\n",
       "    width: 100%;\n",
       "}\n",
       "</style></div>"
      ],
      "text/plain": [
       "Pipeline (\n",
       "  Select (\n",
       "    clouds\n",
       "    humidity\n",
       "    pressure\n",
       "    temperature\n",
       "    wind\n",
       "  ),\n",
       "  Regressor (\n",
       "    module=MyModule(\n",
       "    (dense0): Linear(in_features=5, out_features=5, bias=True)\n",
       "    (nonlin): ReLU()\n",
       "    (dense1): Linear(in_features=5, out_features=1, bias=True)\n",
       "    (softmax): Softmax(dim=-1)\n",
       "  )\n",
       "    loss_fn=\"mse\"\n",
       "    optimizer_fn=\"sgd\"\n",
       "    lr=0.001\n",
       "    is_feature_incremental=False\n",
       "    device=\"cpu\"\n",
       "    seed=42\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset = datasets.Bikes()\n",
    "\n",
    "model_pipeline = compose.Select(\n",
    "    \"clouds\", \"humidity\", \"pressure\", \"temperature\", \"wind\"\n",
    ")\n",
    "model_pipeline |= regression.Regressor(\n",
    "    module=MyModule(5), loss_fn=\"mse\", optimizer_fn=\"sgd\"\n",
    ")\n",
    "model_pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-09-28T08:21:03.088186Z",
     "iopub.status.busy": "2025-09-28T08:21:03.087872Z",
     "iopub.status.idle": "2025-09-28T08:21:06.406835Z",
     "shell.execute_reply": "2025-09-28T08:21:06.406183Z"
    }
   },
   "outputs": [],
   "source": [
    "y_trues = []\n",
    "y_preds = []\n",
    "for batch in batcher(dataset.take(5000), 5):\n",
    "    x, y = zip(*batch)\n",
    "    x = pd.DataFrame(x)\n",
    "    y_trues.extend(y)\n",
    "    y_preds.extend(model_pipeline.predict_many(X=x).values)\n",
    "    model_pipeline.learn_many(X=x, y=pd.Series(y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "execution": {
     "iopub.execute_input": "2025-09-28T08:21:06.409331Z",
     "iopub.status.busy": "2025-09-28T08:21:06.409119Z",
     "iopub.status.idle": "2025-09-28T08:21:06.415885Z",
     "shell.execute_reply": "2025-09-28T08:21:06.415353Z"
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "102.4412"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics.mean_squared_error(y_true=y_trues, y_pred=y_preds)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deep-river",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
